********************************************************************************
*                                                                              *
*                    CREATE PANEL DATA OF FIRMS 2010-201X                      *
*                                                                              *
********************************************************************************

// Directory
cd "H:\"

// First and last year of observation
global minyear = 2009
global maxyear = 2021

// Turn on(1)/off(0) modules
global IHG = 1
global Investeringen = 0
global ICTbedrijven = 0

global minrobotinv = 2500

////////////////////////////////////////////////////////////////////////////////
//                      1) Compile IHG data (Int. Trade Register)             //
////////////////////////////////////////////////////////////////////////////////

if $IHG == 1 {
	import excel "Robots and Workers\Data\IHGCountryCodes.xlsx", sheet("Sheet1") firstrow clear
	rename (Code Country EU EEA) (land_hb country tradeEU tradeEEA)
	keep land_hb tradeEU
	save "Robots and Workers\Data\IGH_temp.dta", replace
		
	
	
	local d = 1
	qui forvalues year = $minyear(1)$maxyear {
		nois _dots `d' 0 
		
		// Open data from SPSS for different years
		if `year' == 2009 | `year' == 2010 | `year'== 2011 {
			import spss using "G:\Internationalehandel\IHG\\`year'\130620 IHG per BE_ID `year'V1.sav", case(lower) clear
		}
		if `year' == 2012 {
			import spss using "G:\Internationalehandel\IHG\\`year'\131018 IHG per BE_ID `year'V1.sav", case(lower) clear
		}
		if `year' == 2013 {
			import spss using "G:\Internationalehandel\IHG\\`year'\141117 IH per BE `year'V1.sav", case(lower) clear
		}
		if `year' == 2014 | `year' == 2015 | `year' == 2017 | `year' == 2018 | `year' == 2019 | `year' == 2020 {	
			import spss using "G:\Internationalehandel\IHG\\`year'\IHG_per_BE_ID`year'V1.sav", case(lower) clear
		}
		if `year' == 2016 {		
			import spss using "G:\Internationalehandel\IHG\\`year'\IHG_per_BE_ID`year'V2.sav", case(lower) clear
		}
		if `year' == 2021 {	
			import spss using "G:\Internationalehandel\IHG\\`year'\IHG_naar_BE_ID_CvB2_`year'.sav", case(lower) clear
		}
		
		g gsrt4 = substr(gsrt,1,4)

		// Imports
		bysort be_id: egen imp_tot = total(invoer)
		bysort be_id: egen imp_mach = total(invoer*(gsrt4=="8479"))
		bysort be_id: egen imp_robots = total(invoer*(gsrt=="84795000"))
		bysort be_id: egen imp_robots_r = total(invoer*(gsrt=="84795000")*(respons=="R"))

		
		// Exports
		bysort be_id: egen exp_tot = total(uitvoer)
		bysort be_id: egen exp_mach = total(uitvoer*(gsrt4=="8479"))
		bysort be_id: egen exp_robots = total(uitvoer*(gsrt=="84795000"))

		// Re-exports
		bysort be_id: egen reexp_tot = total(wederuitvoer)
		bysort be_id: egen reexp_mach = total(wederuitvoer*(gsrt4=="8479"))
		bysort be_id: egen reexp_robots = total(wederuitvoer*(gsrt=="84795000"))

		destring land_hb, force replace
		merge m:1 land_hb using "Robots and Workers\Data\IGH_temp.dta", nogen keep(1 3)
		bysort be_id: egen EUthreshold = total(invoer*(tradeEU==1))
		bysort be_id: replace tradeEU = EUthreshold/imp_tot
		replace EUthreshold = EUthreshold>1200000
		
		duplicates drop be_id, force
		keep be_id *EU* imp_* exp_* reexp_*
		g year = `year'
		order be_id year *EU* imp_* exp_* reexp_*
		drop if be_id == .
		
		// Save by year
		compress
		do "Robots and Workers\Do-files\Variable labels"
		save "Robots and Workers\Data\IHG_`year'.dta", replace
		
		local d = `d'+1
	}

	// Append the yearly data
	clear all
	forvalues year = $minyear(1)$maxyear {
		append using "Robots and Workers\Data\IHG_`year'.dta"
		save "Robots and Workers\Data\IHG_$minyear$maxyear.dta", replace
	}

	sort be_id year
	rename be_id beid
	save "Robots and Workers\Data\IHG_$minyear$maxyear.dta", replace
	erase "Robots and Workers\Data\IGH_temp.dta"
	
	// Create balanced panel of firms to calculate cumulative investments
	// 	use "Robots and Workers\Data\IHG_$minyear$maxyear.dta", clear
	// 	duplicates drop beid, force
	// 	keep beid
	// 	save "Robots and Workers\Data\temp_IHG_$minyear$maxyear.dta", replace
	//	
	// 	clear all
	// 	g year = .
	// 	forvalues year = $minyear(1)$maxyear {
	// 		append using "Robots and Workers\Data\temp_IHG_$minyear$maxyear.dta"
	// 		replace year = `year' if year == .
	// 		erase "Robots and Workers\Data\IHG_`year'.dta"
	// 	}

	clear all
	g year = .
	forvalues year = $minyear(1)$maxyear {
		append using "H:\Robots and Workers\Data\BEID_list.dta"
		replace year = `year' if year == .
		erase "Robots and Workers\Data\IHG_`year'.dta"
	}
	
	merge 1:1 beid year using "Robots and Workers\Data\IHG_$minyear$maxyear.dta", nogen keep(1 3)
	capture erase "Robots and Workers\Data\temp_IHG_$minyear$maxyear.dta"
	
	sort beid year
	order year, after(beid)
	
	foreach var in imp_tot imp_mach imp_robots imp_robots_r exp_tot exp_mach exp_robots reexp_tot reexp_mach reexp_robots {
		replace `var' = 0 if `var' == .
		g c`var' = `var' if year == $minyear
		replace c`var' = `var'+c`var'[_n-1] if beid==beid[_n-1]
		order c`var', after(`var')
	}
	replace tradeEU = 0 if tradeEU == .
	replace EUthreshold = 1 if EUthreshold == .
	
	compress
	do "Robots and Workers\Do-files\Variable labels"
	save "Robots and Workers\Data\IHG_panel_$minyear$maxyear.dta", replace
	
	// Consider above-median cumulative investments in robots
	centile cimp_robots if cimp_robots>0, centile(50)
	local threshold = r(c_1)
	
	local threshold = $minrobotinv
	g robots = cimp_robots>`threshold' & cimp_robots!=.
	g robots_r = cimp_robots_r>`threshold' & cimp_robots_r!=.	
	g robotsfirm = cimp_robots>`threshold' & cimp_robots!=.
	bysort beid: ereplace robotsfirm = max(robotsfirm)
	
	g robotsyear = year if robots==1
	bysort beid: ereplace robotsyear = min(robotsyear)
	gsort -robotsfirm beid year
	g robots_tmin5 = year-robotsyear==-5 & robotsfirm==1 if year <= $maxyear-5		
	g robots_tmin4 = year-robotsyear==-4 & robotsfirm==1 if year <= $maxyear-4	
	g robots_tmin3 = year-robotsyear==-3 & robotsfirm==1 if year <= $maxyear-3	
	g robots_tmin2 = year-robotsyear==-2 & robotsfirm==1 if year <= $maxyear-2	
	g robots_tmin1 = year-robotsyear==-1 & robotsfirm==1 if year <= $maxyear-1
	g robots_t0 = year-robotsyear==0 & robotsfirm==1
	g robots_t1 = year-robotsyear==1 & robotsfirm==1 if year >= $minyear+1	
	g robots_t2 = year-robotsyear==2 & robotsfirm==1 if year >= $minyear+2	
	g robots_t3 = year-robotsyear==3 & robotsfirm==1 if year >= $minyear+3
	g robots_t4 = year-robotsyear==4 & robotsfirm==1 if year >= $minyear+4
	g robots_gte1 = year-robotsyear>=1 & robotsfirm==1 if year >= $minyear+1	
	g robots_gte2 = year-robotsyear>=2 & robotsfirm==1 if year >= $minyear+2	
	g robots_gte3 = year-robotsyear>=3 & robotsfirm==1 if year >= $minyear+3		
	g robots_gte4 = year-robotsyear>=4 & robotsfirm==1 if year >= $minyear+4	
	g robots_gte5 = year-robotsyear>=5 & robotsfirm==1 if year >= $minyear+5		
	
	sort beid year
	g cimp_robots_t0 = cimp_robots*(robots_t0==1)
	bysort beid: ereplace cimp_robots_t0 = max(cimp_robots_t0)
	replace cimp_robots_t0 = cimp_robots_t0*robots
	g robots_mt = cimp_robots>(cimp_robots_t0+`threshold') if robots!=.
	
	drop robotsyear cimp_robots_t0
	
	do "Robots and Workers\Do-files\Variable labels"
	compress
	save "Robots and Workers\Data\IHG_panel_$minyear$maxyear.dta", replace
}



////////////////////////////////////////////////////////////////////////////////
//                        2) Compile investeringen data                       //
////////////////////////////////////////////////////////////////////////////////
if $Investeringen == 1 {
forvalues year = 2000(1)2011 {
	*NOTE: Before 2000 the sample is just about 15% of the sample later on (in 1999, N=143588, in 2000=711921)
	
	// Open files
	if `year' == 1995 | `year' == 1996 | `year' == 1997 | `year' == 1998 | `year' == 1999 {		
		import spss using "G:\Bedrijven\INVESTERINGEN\\`year'\090206 Investeringen_main `year'V1.sav", case(lower) clear
	}
	if `year' == 2000 | `year' == 2001 | `year' == 2002 | `year' == 2003 | `year' == 2004  | `year' == 2005 | `year' == 2006 {	
		import spss using "G:\Bedrijven\INVESTERINGEN\\`year'\090129 Investeringen_main `year'V1.sav", case(lower) clear
	}
	if `year' == 2007 {
		import spss using "G:\Bedrijven\INVESTERINGEN\\`year'\091028 Investeringen_main `year'V1.sav", case(lower) clear	
	}
	if `year' == 2008 {
		import spss using "G:\Bedrijven\INVESTERINGEN\\`year'\101018 Investeringen_main `year'V1.sav", case(lower) clear
	}
	if `year' == 2009 {
		import spss using "G:\Bedrijven\INVESTERINGEN\\`year'\111027 Investeringen_main `year'V1.sav", case(lower) clear
	}
	if `year' == 2010 {		
		import spss using "G:\Bedrijven\INVESTERINGEN\\`year'\121010 Investeringen_main `year'V1.sav", case(lower) clear
	}
	if `year' == 2011 {
		import spss using "G:\Bedrijven\INVESTERINGEN\\`year'\Investeringen_main `year'V3.sav", case(lower) clear
	}
	if `year' >= 1995 & `year' < 2000 {
		local prefix 93
	}
		
	// Create relevant variables
	* Note: software is only available from 2012 onwards!
	
	if `year' == 2000 {
		local prefix 00
	}
	if `year' > 2000 {
		local prefix eu01
	}
	
	g inv_total = alo`prefix'nr9*1000
	g inv_computers = alo`prefix'nr6*1000
	if `year' >= 1995 & `year' < 2000 {
		g inv_mach = alo`prefix'nr5*1000 + alo`prefix'nr7*1000
	}
	if `year' >= 2000 {
		g inv_mach = alo`prefix'nr57*1000
	}
	g inv_realestate = (alo`prefix'nr1+alo`prefix'nr2+alo`prefix'nr3)*1000
	g inv_transport = alo`prefix'nr4*1000
	
	foreach var in inv_realestate inv_transport inv_computers inv_mach {
		replace `var' = 0 if inv_total!=. & `var' == .
	}
	drop if inv_total==.
	
	capture rename gk_var02 gksbs
	capture rename be_id beid
	capture destring beid, force replace
	capture rename gk_sbs gksbs
	if `year'<= 2008 {
		rename (sbi93_5d gksbs) (sbi93_5 firmsize_class)
	}
	if `year'> 2008 {
		rename (sbi08_5d gksbs) (sbi08_5 firmsize_class)
	}	
	capture keep beid sbi93_5 firmsize_class inv_*
	capture keep beid sbi08_5 firmsize_class inv_*
	g year = `year'
	order beid year sbi* firmsize_class inv_*
	duplicates drop beid, force
	compress
	save "Robots and Workers\Data\Investments_`year'.dta", replace
}

// 2012 - 2019 (is an extended version with alternative variable names and new variables)
forvalues year = 2012(1)$maxyear {
	if `year' == 2012 {
		import spss using "G:\Bedrijven\INVESTERINGEN\\`year'\Investeringen_nationaal  `year'V2.sav", clear case(lower)
	}
	if `year' == 2013 {
		import spss using "G:\Bedrijven\INVESTERINGEN\\`year'\Investeringen_nationaal  `year'V1.sav", case(lower) clear
	}
	if `year'== 2014 | `year' == 2015 {
		import spss using "G:\Bedrijven\INVESTERINGEN\\`year'\Investeringen_nationaal`year'V1.sav", case(lower) clear
	}
	if `year' == 2016 | `year' == 2018 {	
	import spss using "G:\Bedrijven\INVESTERINGEN\\`year'\Investeringen_nationaal`year'V3.sav", case(lower) clear
	}
	if `year' == 2017 | `year' >= 2019 {	
	import spss using "G:\Bedrijven\INVESTERINGEN\\`year'\Investeringen_nationaal`year'V2.sav", case(lower) clear	
	}	
		g inv_total = c014004*1000
		g inv_realestate = (c001004+c002004+c003004+c004004)*1000
		g inv_transport = (c005004+c006004+c007004+c008004+c009004)*1000
		g inv_computers = c010004*1000
		g inv_mach = c012004*1000
		g inv_softw = c015004*1000
		g inv_specsoftw = (c015002+ c015003)*1000

		foreach var in inv_realestate inv_transport inv_computers inv_mach inv_softw inv_specsoftw {
			replace `var' = 0 if inv_total!=. & `var' == .
		}
	drop if inv_total==.
	
	capture rename be_id beid
	capture destring beid, force replace
	capture rename gk_sbs gksbs
	rename (sbi08_5d gksbs) (sbi08_5 firmsize_class)
		
	keep beid sbi08_5 firmsize_class inv_*
	g year = `year'
	order beid year sbi firmsize_class inv_*
	duplicates drop beid, force
	compress
	save "Robots and Workers\Data\Investments_`year'.dta", replace
}

// Create BALANCED panel of firms
clear all
forvalues year = 2000(1)$maxyear {
	append using "Robots and Workers\Data\Investments_`year'.dta"
}	
keep beid
duplicates drop beid, force
save "Robots and Workers\Data\Investments_temp.dta", replace

clear all
g year = .
forvalues year = 2000(1)$maxyear {
	append using "Robots and Workers\Data\Investments_temp.dta"
	replace year = `year' if year == .
	merge 1:1 beid year using "Robots and Workers\Data\Investments_`year'.dta", keep(1 3 4 5) update nogen
	//erase "Robots and Workers\Data\Investments_`year'.dta"
}	

sort beid year
foreach var in inv_total inv_realestate inv_transport inv_computers inv_mach inv_softw inv_specsoftw {
	replace `var' = 0 if `var' == .
	g c`var' = `var' if beid!=beid[_n-1]
	replace c`var' = `var'+c`var'[_n-1] if beid==beid[_n-1]
	order c`var', after(`var')
}
do "Robots and Workers\Do-files\Variable labels"
	
order sbi08_5, after(sbi93_5)	

drop if year < ($minyear-5)

save "Robots and Workers\Data\Investments.dta", replace
}


